import re
import time
import os
import requests

headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0'
}



iurl_base = 'https://www.4khd.com/search/'
iurl_name = '%E7%96%AF%E7%8C%AB'
ipage = '/page/2'
response = requests.get(iurl_base + iurl_name + ipage,headers=headers)
ihtml = response.text
#print(response.text)
#
idir_name = re.findall(r'href=(.*?)" target="_self"',ihtml)
print(idir_name)

# 使用 set 去重
unique_urls = set(idir_name)

# 提取 https://www.4khd.com/content/ 后面的部分
# 可以使用列表推导式和字符串操作获取
trimmed_urls = [url.replace('https://www.4khd.com/content/', '') for url in unique_urls]

# 打印最终结果
print(trimmed_urls)


